################################################################################
#        CAS Data Engineering Modul 2 - "Crypto Gruppe"                        #
#        Time Series Analysis of Bitcoin Prices with R                         #
#         -> Statistical Tests and ARIMA Experiment <-                         #
#                                                                              #
#                         09 July 2021                                         #
#                                                                              #
################################################################################
# References: 
#
# SUMNER, T. 14/11/2019. Forecasting Bitcoin in R. 
# Available from: https://rstudio-pubs-static.s3.amazonaws.com/549884_39fa223876e448608b7a7fa79337feba.html
#
# JAQUART, P. DANN, D. WEINHARDT, Ch. Short-term bitcoin market prediction via machine learning
# Available online at www.sciencedirect.com
#
# URAS, N. MARCHESI, L. MARCHESI, M. TONELLI, R. Forecasting Bitcoin closing price series using linear regression and neural networks models
# Uras et al. (2020), PeerJ Comput. Sci., DOI 10.7717/peerj-cs.279
#
#
################################################################################


# Remove Data & Libraries
rm(list=ls())
# Load Libraries
library(foreign)
library(psych)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.2     v stringr 1.4.0
## v tidyr   1.1.3     v forcats 0.5.1
## v readr   1.4.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x ggplot2::%+%()   masks psych::%+%()
## x ggplot2::alpha() masks psych::alpha()
## x dplyr::filter()  masks stats::filter()
## x dplyr::lag()     masks stats::lag()
library(zoo)
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(fpp2)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## -- Attaching packages ---------------------------------------------- fpp2 2.4 --
## v forecast  8.15     v expsmooth 2.3 
## v fma       2.4
## -- Conflicts ------------------------------------------------- fpp2_conflicts --
## x ggplot2::%+%()   masks psych::%+%()
## x ggplot2::alpha() masks psych::alpha()
library(astsa)
## 
## Attaching package: 'astsa'
## The following objects are masked from 'package:fma':
## 
##     chicken, sales
## The following object is masked from 'package:forecast':
## 
##     gas
## The following object is masked from 'package:fpp2':
## 
##     oil
## The following object is masked from 'package:psych':
## 
##     scatter.hist
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(tseries)
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:purrr':
## 
##     some
## The following object is masked from 'package:dplyr':
## 
##     recode
## The following object is masked from 'package:psych':
## 
##     logit
library(caret)
## Loading required package: lattice
## 
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
## 
##     lift
library(e1071)
# Initial configurations
# Set default font
windowsFonts(Georgia = windowsFont("Georgia"))


## Set Original plot theme
my_theme = theme(panel.grid = element_line(color = '#e6e6e6'),
                 panel.background = element_rect(fill = 'white'),
                 plot.title = element_text(hjust = .5, size = 28, 
                                           colour = '#ffa500'),
                 text = element_text(family = 'Georgia'),
                 axis.text = element_text(size = 10),
                 axis.title = element_text(size = 18, family = 'Georgia', 
                                           face = 'bold'),
                 axis.line = element_line(colour = '#737373', size = 1),
                 strip.background = element_rect(colour = "black", 
                                                 fill = "white"),
                 strip.text = element_text(face = 'bold'))  
# Read Data
# Read Original Bitcoin Dataset
bitcoin <- read_csv('bitcoin_full_daily_prices.csv')
## 
## -- Column specification --------------------------------------------------------
## cols(
##   .default = col_double(),
##   Date = col_date(format = ""),
##   sentiment_elon = col_character(),
##   sentiment_bitcoin = col_character()
## )
## i Use `spec()` for the full column specifications.
names(bitcoin)
##  [1] "Date"                   "Open"                   "High"                  
##  [4] "Low"                    "Close"                  "WeightedPrice"         
##  [7] "Volume"                 "SMA_30"                 "EMA_40"                
## [10] "Altcoin_EMA_40"         "DASH"                   "DOGE"                  
## [13] "ETC"                    "ETH"                    "LTC"                   
## [16] "SC"                     "XEM"                    "XMR"                   
## [19] "XRP"                    "ZEC"                    "CLF"                   
## [22] "CNYUSDX"                "DJI"                    "EURUSDX"               
## [25] "GCF"                    "GSPC"                   "IXIC"                  
## [28] "JPYUSDX"                "TSLA"                   "VIX"                   
## [31] "XWDTO"                  "Cost_per_TR"            "Num_TR_per_Block"      
## [34] "Bu_Be_Spread_MA8"       "SMA_05"                 "SMA_90"                
## [37] "EMA_05"                 "EMA_90"                 "MACD"                  
## [40] "Avg_Dir_Mvmt"           "RSI"                    "Awesome_Osc"           
## [43] "ROC"                    "Stoch_RSI"              "Ultimate_Osc"          
## [46] "True_SI"                "Cum_Return"             "Log_Return"            
## [49] "Number_of_Transactions" "Active_Addresses"       "New_Addresses"         
## [52] "Hash_Rate"              "sentiment_elon"         "sentiment_bitcoin"
# Read United BTC Dataset (used in the Python Modelling)
btc <- read_csv('bitcoin_full_daily_returns.csv')
## 
## -- Column specification --------------------------------------------------------
## cols(
##   .default = col_double(),
##   Date = col_date(format = ""),
##   sentiment_elon = col_character(),
##   sentiment_bitcoin = col_character()
## )
## i Use `spec()` for the full column specifications.
names(btc)
##  [1] "Date"                   "WeightedPrice_return"   "DASH_return"           
##  [4] "DOGE_return"            "ETC_return"             "ETH_return"            
##  [7] "LTC_return"             "SC_return"              "XEM_return"            
## [10] "XMR_return"             "XRP_return"             "ZEC_return"            
## [13] "CLF_return"             "CNYUSDX_return"         "DJI_return"            
## [16] "EURUSDX_return"         "GCF_return"             "GSPC_return"           
## [19] "IXIC_return"            "JPYUSDX_return"         "TSLA_return"           
## [22] "VIX_return"             "XWDTO_return"           "Volume"                
## [25] "SMA_30"                 "EMA_40"                 "Altcoin_EMA_40"        
## [28] "Cost_per_TR"            "Num_TR_per_Block"       "Bu_Be_Spread_MA8"      
## [31] "SMA_05"                 "SMA_90"                 "EMA_05"                
## [34] "EMA_90"                 "MACD"                   "Avg_Dir_Mvmt"          
## [37] "RSI"                    "Awesome_Osc"            "ROC"                   
## [40] "Stoch_RSI"              "Ultimate_Osc"           "True_SI"               
## [43] "Cum_Return"             "Log_Return"             "Number_of_Transactions"
## [46] "Active_Addresses"       "New_Addresses"          "Hash_Rate"             
## [49] "sentiment_elon"         "sentiment_bitcoin"
# Create Time series Dataset
# Create original time series Dataset
bit_ts = bitcoin %>%
  filter(Date > as.Date('2017-01-01')) %>%
  arrange(Date) %>%
  select(WeightedPrice) %>%
  as.matrix() %>%
  ts()


# Create time series from BTC returns Dataset
bit_ret_ts = btc %>%
  filter(Date > as.Date('2017-01-01')) %>%
  arrange(Date) %>%
  select(WeightedPrice_return) %>%
  as.matrix() %>%
  ts()
# Plot BTC Prices (full Dataset)
# Plot BTC Data
ggplotly(ggplot(bitcoin, aes(Date, WeightedPrice)) + geom_line(col = '#ffa500') + 
           labs(title = 'Bitcoin Weighted Prices 2014 -2021', x = '') +
           scale_y_continuous(breaks = c(0, 5000, 10000, 15000, 30000, 60000), 
                              labels = c('$0', '$5,000', '$10,000', '$15,000', 
                                         '$30,000', '$60,000')) + my_theme)
# Plot BTC Prices after 2017
ggplotly(bitcoin %>%
           filter(Date > as.Date('2017-01-01')) %>% ggplot(aes(Date, 
                                                               WeightedPrice)) + 
           geom_line(col = '#ffa500') + 
           labs(title = 'Bitcoin Weighted Prices after 2017', x = '') +
           scale_y_continuous(breaks = c(0, 5000, 10000, 15000, 30000, 60000), 
                              labels = c('$0', '$5,000', '$10,000', '$15,000', 
                                         '$30,000', '$60,000')) + my_theme)
# Correlation plots for BTC Prices & its lags
gglagplot(bit_ts, do.lines = F) + my_theme +
  scale_color_continuous(low = "#b37400", high = "#ffc04d", 
                         breaks = c(1, 366, 731, 1097, 1463), 
                         labels = c('2017', '2018', '2019', '2020', '2021')) + 
  scale_y_continuous(breaks = c(0, 5000, 10000, 15000, 30000, 60000), 
                     labels = c('$0', '$5,000', '$10,000', '$15,000', 
                                '$30,000', '$60,000')) +
  scale_x_continuous(breaks = c(5000, 10000, 15000, 30000, 60000), 
                     labels = c('$5,000', '$10,000', '$15,000', 
                                '$30,000', '$60,000'))

# Autocorrelation (ACF) and Partial Autocorrelation (PACF) plots
ggAcf(bit_ts, lag.max = 200) + my_theme + labs(title = 'ACF' , y = 'Correlation')

ggPacf(bit_ts, lag.max = 200) + my_theme + labs(title = 'PACF', y = '')

# Autocorrelation (ACF) and Partial Autocorrelation (PACF) after differencing
ggAcf(diff(bit_ts), lag.max = 200) + my_theme + 
  labs(title = 'ACF with First Differnce' , y = 'Correlation') 

ggPacf(diff(bit_ts), lag.max = 200) + my_theme + 
  labs(title = 'PACF with First Difference', y = '')

# Autocorrelation (ACF) and Partial Autocorrelation (PACF) with daily returns 
ggAcf(bit_ret_ts, lag.max = 200) + my_theme + 
  labs(title = 'ACF with Daily Returns' , y = 'Correlation') 

ggPacf(bit_ret_ts, lag.max = 200) + my_theme + 
  labs(title = 'PACF with Daily Returns', y = '')

# Plot First Difference after 2017
cut_bit_df = bitcoin %>%
  filter(Date > as.Date('2017-01-01'))

ggplotly(cut_bit_df[-1,] %>%
           mutate(WeightedPrice = diff(cut_bit_df$WeightedPrice)) %>%
           ggplot(aes(Date, WeightedPrice)) + geom_line(col = '#ffa500') + 
           my_theme + labs(x = '', title = 'Bitcoin Differenced By One', 
                           y = 'Difference'))
# Plot Daily Returns after 2017
cut_bit_ret_df = btc %>%
  filter(Date > as.Date('2017-01-01'))

ggplotly(cut_bit_ret_df[-1,] %>%
           mutate(WeightedPrice_return = diff(
             cut_bit_ret_df$WeightedPrice_return)) %>%
           ggplot(aes(Date, WeightedPrice_return)) + geom_line(col = '#ffa500') + 
           my_theme + labs(x = '', title = 'Bitcoin Daily Returns', 
                           y = '% Returns'))
# Box-Cox Normalization of Bitcoin Prices
BoxCox.lambda(bit_ts)
## [1] -0.03727455
ggplotly(cut_bit_df %>%
           mutate(WeightedPrice = BoxCox(cut_bit_df$WeightedPrice, 
                                         lambda=BoxCox.lambda(
                                           cut_bit_df$WeightedPrice))) %>%
           ggplot(aes(Date, WeightedPrice)) + geom_line(col = '#ffa500') + 
           my_theme + labs(x = '', title = 'Bitcoin Box-Cox transformed', 
                           y = 'BTC Price Transformed'))
# Plot First Difference, Transformed First Difference & Daily Returns
## Original Price
cut_bit_df[-1,] %>%
  mutate(WeightedPrice = diff(cut_bit_df$WeightedPrice)) %>%
  ggplot(aes(Date, WeightedPrice)) + geom_line(col = '#650fba') + my_theme + 
  labs(x = '', title = 'Original BTC Price', y = 'Difference')

## Transformed Price
cut_bit_df[-1,] %>%
  mutate(WeightedPrice = diff(BoxCox(cut_bit_df$WeightedPrice, 
                                     lambda = BoxCox.lambda(
                                       cut_bit_df$WeightedPrice)))) %>%
  ggplot(aes(Date, WeightedPrice)) + geom_line(col = '#650fba') + my_theme + 
  labs(x = '', title = 'Transformed BTC Price', y = '')

## Daily Returns of Price
cut_bit_ret_df[-1,] %>%
  ggplot(aes(Date, WeightedPrice_return)) + geom_line(col = '#650fba') + my_theme + 
  labs(x = '', title = 'Daily Returns of BTC Price', y = '')

# Autocorrelation (ACF) and Partial Autocorrelation (PACF) transformed Prices 
bit_ts_tran = BoxCox(bit_ts, lambda = BoxCox.lambda(bit_ts))

ggAcf(diff(bit_ts_tran), lag.max = 200) + my_theme + labs(title = 'ACF' , y = 'Correlation') 

ggPacf(diff(bit_ts_tran), lag.max = 200) + my_theme + labs(title = 'PACF', y = '')

# Test for Stationarity
# Daily Prices Dataset
adf.test(bit_ts) # p-value < 0.05 indicates the TS is stationary
## 
##  Augmented Dickey-Fuller Test
## 
## data:  bit_ts
## Dickey-Fuller = -1.6865, Lag order = 11, p-value = 0.711
## alternative hypothesis: stationary
# Box-Cox Transformed Dataset
adf.test(bit_ts_tran) # p-value < 0.05 indicates the TS is stationary
## 
##  Augmented Dickey-Fuller Test
## 
## data:  bit_ts_tran
## Dickey-Fuller = -2.1671, Lag order = 11, p-value = 0.5076
## alternative hypothesis: stationary
# Daily Returns Dataset
adf.test(bit_ret_ts) # p-value < 0.05 indicates the TS is stationary
## Warning in adf.test(bit_ret_ts): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  bit_ret_ts
## Dickey-Fuller = -10.475, Lag order = 11, p-value = 0.01
## alternative hypothesis: stationary
# Test for Homoskedasticity
# Original Dataset
# Fit LM
lmMod_orig <- lm(WeightedPrice ~ Volume+SMA_30+EMA_40+Altcoin_EMA_40+DASH+DOGE+ETC+ETH+LTC+SC+XEM+XMR+XRP+ZEC+CLF+
              CNYUSDX+DJI+EURUSDX+GCF+GSPC+IXIC+JPYUSDX+TSLA+VIX+XWDTO+Cost_per_TR+Num_TR_per_Block+
              Bu_Be_Spread_MA8+SMA_05+SMA_90+EMA_05+EMA_90+MACD+Avg_Dir_Mvmt+RSI+Awesome_Osc+ROC+
              Stoch_RSI+Ultimate_Osc+True_SI+Cum_Return+Log_Return+Number_of_Transactions+
              Active_Addresses+New_Addresses+Hash_Rate, data=bitcoin)


par(mfrow=c(2,2))
plot(lmMod_orig)
## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced

## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced

# Breusch Pagan Test
lmtest::bptest(lmMod_orig)
## 
##  studentized Breusch-Pagan test
## 
## data:  lmMod_orig
## BP = 975.99, df = 46, p-value < 2.2e-16
# NCV Test
car::ncvTest(lmMod_orig)
## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 12287.19, Df = 1, p = < 2.22e-16
# Transformed Dataset (Daily returns)
# Fit LM
lmMod_ret <- lm(WeightedPrice_return ~ DASH_return+DOGE_return+ETC_return+ETH_return+LTC_return+SC_return+XEM_return+
                 XMR_return+XRP_return+ZEC_return+CLF_return+CNYUSDX_return+DJI_return+EURUSDX_return+
                 GCF_return+GSPC_return+IXIC_return+JPYUSDX_return+TSLA_return+VIX_return+
                 XWDTO_return+Volume+SMA_30+EMA_40+Altcoin_EMA_40+Cost_per_TR+Num_TR_per_Block+
                 Bu_Be_Spread_MA8+SMA_05+SMA_90+EMA_05+EMA_90+MACD+Avg_Dir_Mvmt+RSI+Awesome_Osc+
                 ROC+Stoch_RSI+Ultimate_Osc+True_SI+Cum_Return+Log_Return+Number_of_Transactions+
                 Active_Addresses+New_Addresses+Hash_Rate, data=btc)

par(mfrow=c(2,2))
plot(lmMod_ret)
## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced

## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced

# Breusch Pagan Test
lmtest::bptest(lmMod_ret)
## 
##  studentized Breusch-Pagan test
## 
## data:  lmMod_ret
## BP = 488.71, df = 46, p-value < 2.2e-16
# NCV Test
car::ncvTest(lmMod_ret)
## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 419.7224, Df = 1, p = < 2.22e-16
# Transform Dataset with Box-Cox and check the heteroskedasticity again
# Make Box-Cox transformation model
distBCMod <- caret::BoxCoxTrans(bitcoin$WeightedPrice)
print(distBCMod)
## Box-Cox Transformation
## 
## 2655 data points used to estimate Lambda
## 
## Input data summary:
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   203.3   472.8  3765.1  7048.8  8851.4 63170.1 
## 
## Largest/Smallest: 311 
## Sample Skewness: 3.07 
## 
## Estimated Lambda: 0 
## With fudge factor, Lambda = 0 will be used for transformations
# Perform Box-Cox transformation
bitcoin <- cbind(bitcoin, dist_new=predict(distBCMod, bitcoin$WeightedPrice))
head(bitcoin)
##         Date     Open     High      Low    Close WeightedPrice     Volume
## 1 2014-02-26 530.8922 612.2200 529.3997 581.3204      572.4043  87282.479
## 2 2014-02-27 585.5042 599.6675 565.1850 578.7925      578.9220  27925.863
## 3 2014-02-28 578.0178 585.7325 542.8936 549.0490      563.2594  38993.570
## 4 2014-03-01 547.9225 576.7250 534.2375 562.1650      556.7110  21900.142
## 5 2014-03-02 561.7647 568.8898 551.1686 562.0330      560.7188   9532.377
## 6 2014-03-03 565.1014 699.7575 561.4364 667.0159      619.0314 115915.276
##     SMA_30   EMA_40 Altcoin_EMA_40      DASH         DOGE       ETC      ETH
## 1 682.5656 677.7405       9.100173 0.6414306 0.0010990163 0.8856109 10.07716
## 2 675.6139 672.9201       8.901130 0.6356737 0.0011578439 0.8856109 10.07716
## 3 667.7523 667.5708       8.698592 0.5244001 0.0010701929 0.8856109 10.07716
## 4 659.7758 662.1630       8.506256 0.8654350 0.0010410495 0.8856109 10.07716
## 5 651.6469 657.2145       8.324678 0.9219676 0.0009868652 0.8856109 10.07716
## 6 644.9606 655.3519       8.155427 1.0897676 0.0010275921 0.8856109 10.07716
##        LTC           SC         XEM      XMR         XRP      ZEC    CLF
## 1 14.64462 0.0003029306 0.003972578 6.724992 0.007694752 277.4195 102.59
## 2 14.42255 0.0003029306 0.003972578 6.724992 0.007694752 277.4195 102.40
## 3 13.72179 0.0003029306 0.003972578 6.724992 0.007694752 277.4195 102.59
## 4 13.40066 0.0003029306 0.003972578 6.724992 0.007694752 277.4195 102.59
## 5 13.42876 0.0003029306 0.003972578 6.724992 0.007694752 277.4195 102.59
## 6 13.47429 0.0003029306 0.003972578 6.724992 0.007694752 277.4195 104.92
##     CNYUSDX      DJI  EURUSDX    GCF    GSPC    IXIC     JPYUSDX   TSLA   VIX
## 1 0.1635430 16198.41 1.374608 1328.2 1845.16 4292.06 0.009789525 50.600 14.35
## 2 0.1635377 16272.65 1.368195 1331.6 1854.29 4318.93 0.009775266 50.508 14.04
## 3 0.1634414 16321.71 1.370746 1321.4 1859.45 4308.12 0.009793360 48.962 14.00
## 4 0.1634414 16321.71 1.370746 1321.4 1859.45 4308.12 0.009793360 48.962 14.00
## 5 0.1634414 16321.71 1.370746 1321.4 1859.45 4308.12 0.009793360 48.962 14.00
## 6 0.1629965 16168.03 1.377695 1350.1 1845.73 4277.30 0.009871181 50.112 16.00
##      XWDTO Cost_per_TR Num_TR_per_Block Bu_Be_Spread_MA8   SMA_05   SMA_90
## 1 30.02990    40.68028         395.0385        0.1143584 563.3494 613.6189
## 2 30.19822    38.02965         460.0000        0.1152040 562.1636 613.6189
## 3 30.02990    33.50832         442.0000        0.1143583 551.7505 613.6189
## 4 30.02990    33.50832         442.0000        0.1143583 551.9148 613.6189
## 5 30.02990    33.50832         442.0000        0.1143583 566.4031 613.6189
## 6 29.70214    27.54891         373.0000        0.1118211 575.7285 613.6189
##     EMA_05   EMA_90      MACD Avg_Dir_Mvmt      RSI Awesome_Osc        ROC
## 1 559.4679 751.0339 -53.84000     75.87720 38.58185  -132.17401  -5.829356
## 2 565.9526 747.2512 -51.12634     74.32864 38.26849  -126.10740  -7.487245
## 3 565.0548 743.2074 -50.79031     73.04248 34.69747  -129.37471 -11.904074
## 4 562.2736 739.1086 -48.90195     71.90756 37.46850  -122.92658  -9.185845
## 5 561.7553 735.1879 -46.87571     70.85371 37.45128   -99.55943   2.320085
## 6 580.8473 732.6350 -36.37930     67.06642 55.12140   -83.06932  16.783975
##   Stoch_RSI Ultimate_Osc   True_SI Cum_Return  Log_Return
## 1 1.0000000     54.11417 -38.66634  -30.25103  9.05408162
## 2 0.9818706     58.71632 -36.37036  -30.55432 -0.43578758
## 3 0.7752709     53.57958 -35.54387  -34.12306 -5.27563417
## 4 0.9355880     52.17020 -34.01388  -32.54936  2.36076239
## 5 0.9345913     54.50800 -32.80567  -32.56520 -0.02348741
## 6 1.0000000     63.02288 -25.11566  -19.96896 17.12533192
##   Number_of_Transactions Active_Addresses New_Addresses Hash_Rate
## 1                  73176           201181        110785  28675.77
## 2                  70859           185098         95229  29478.51
## 3                  70290           196525         92184  31620.30
## 4                  63402           181719        100239  27215.37
## 5                  56635           159491         80905  26872.27
## 6                  81264           237371        123677  28209.56
##   sentiment_elon sentiment_bitcoin dist_new
## 1        neutral          positive 6.349846
## 2        neutral          positive 6.361168
## 3        neutral          positive 6.333740
## 4        neutral          positive 6.322046
## 5        neutral          positive 6.329220
## 6        neutral          positive 6.428156
# Original Dataset BoxCox Transformed
# Fit LM
lmMod_bc <- lm(dist_new ~ Volume+SMA_30+EMA_40+Altcoin_EMA_40+DASH+DOGE+ETC+ETH+LTC+SC+XEM+XMR+XRP+ZEC+CLF+
                          CNYUSDX+DJI+EURUSDX+GCF+GSPC+IXIC+JPYUSDX+TSLA+VIX+XWDTO+Cost_per_TR+Num_TR_per_Block+
                          Bu_Be_Spread_MA8+SMA_05+SMA_90+EMA_05+EMA_90+MACD+Avg_Dir_Mvmt+RSI+Awesome_Osc+ROC+
                          Stoch_RSI+Ultimate_Osc+True_SI+Cum_Return+Log_Return+Number_of_Transactions+
                          Active_Addresses+New_Addresses+Hash_Rate, data=bitcoin)


lmtest::bptest(lmMod_bc)
## 
##  studentized Breusch-Pagan test
## 
## data:  lmMod_bc
## BP = 758.13, df = 46, p-value < 2.2e-16
car::ncvTest(lmMod_bc)
## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 122.7901, Df = 1, p = < 2.22e-16
plot(lmMod_bc)

## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced

## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced

# Transform 2017+ Dataset with Box-Cox and check the heteroskedasticity again
# Make Box-Cox transformation model
distBCMod_2017p <- caret::BoxCoxTrans(cut_bit_df$WeightedPrice)
print(distBCMod_2017p)
## Box-Cox Transformation
## 
## 1614 data points used to estimate Lambda
## 
## Input data summary:
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   800.5  4518.7  7955.4 11310.2 10598.7 63170.1 
## 
## Largest/Smallest: 78.9 
## Sample Skewness: 2.54 
## 
## Estimated Lambda: 0 
## With fudge factor, Lambda = 0 will be used for transformations
# Perform Box-Cox transformation
cut_bit_df <- cbind(cut_bit_df, dist_new_2017p=predict(distBCMod_2017p, cut_bit_df$WeightedPrice))
head(cut_bit_df)
##         Date     Open      High       Low     Close WeightedPrice   Volume
## 1 2017-01-02 1000.834 1107.8520  989.0440 1023.5162     1034.6061 12317.03
## 2 2017-01-03 1015.385 1043.6383 1008.9160 1040.3600     1028.5963 12102.70
## 3 2017-01-04 1043.216 1160.4240 1033.1701 1142.4498     1109.4000 30923.99
## 4 2017-01-05 1137.100 1157.9446  904.4820 1015.5040     1028.4806 54107.88
## 5 2017-01-06 1004.590 1041.9672  888.2768  924.3226      941.5943 39906.16
## 6 2017-01-07  914.762  939.9091  824.6119  917.5961      884.8570 30801.34
##     SMA_30   EMA_40 Altcoin_EMA_40     DASH         DOGE      ETC       ETH
## 1 845.3696 844.5064       15.60276 11.90462 0.0002172673 1.435816  8.381841
## 2 854.0453 853.4864       15.26059 12.70047 0.0002160052 1.503736  9.343522
## 3 865.7015 865.9700       14.98043 15.76614 0.0002218800 1.650521 10.775181
## 4 874.5525 873.8973       14.68696 14.59271 0.0002159809 1.620495 10.274089
## 5 880.2861 877.1996       14.38358 13.29806 0.0002165667 1.534667 10.313075
## 6 883.9599 877.5732       14.07224 12.16846 0.0002212143 1.411719  9.964207
##        LTC           SC         XEM      XMR         XRP      ZEC   CLF
## 1 4.672416 0.0002379594 0.003424546 15.59237 0.006404212 50.19510 52.33
## 2 4.579383 0.0002365772 0.003476656 16.16504 0.006408155 49.69577 52.33
## 3 4.620385 0.0002662560 0.003505704 17.40406 0.006645306 53.87401 53.26
## 4 4.450503 0.0002674050 0.003373416 16.45925 0.006006327 50.63759 53.76
## 5 4.095577 0.0002542305 0.003361492 14.93277 0.006449921 48.43582 53.99
## 6 3.908838 0.0002654571 0.003318214 12.92683 0.006406365 46.92434 53.99
##     CNYUSDX      DJI  EURUSDX    GCF    GSPC    IXIC     JPYUSDX   TSLA   VIX
## 1 0.1440134 19881.76 1.052698 1160.4 2257.83 5429.08 0.008562011 43.398 12.85
## 2 0.1440092 19881.76 1.046003 1160.4 2257.83 5429.08 0.008511000 43.398 12.85
## 3 0.1436823 19942.16 1.041992 1163.8 2270.75 5477.00 0.008499137 45.398 11.85
## 4 0.1444023 19899.29 1.050089 1179.7 2269.00 5487.94 0.008538762 45.350 11.67
## 5 0.1451821 19963.80 1.060592 1171.9 2276.98 5521.06 0.008675661 45.802 11.32
## 6 0.1451821 19963.80 1.060592 1171.9 2276.98 5521.06 0.008675661 45.802 11.32
##      XWDTO Cost_per_TR Num_TR_per_Block Bu_Be_Spread_MA8    SMA_05   SMA_90
## 1 40.29515    7.350606         1829.881        0.1836159  981.8926 746.8906
## 2 40.29515    7.410000         1839.410        0.1856546  993.8454 751.1161
## 3 40.35140    7.300000         2143.920        0.1876934 1024.6242 756.4615
## 4 40.27640    6.913398         2060.721        0.1897321 1038.3718 761.0021
## 5 40.31391    5.470000         2151.580        0.1891224 1028.5355 764.5755
## 6 40.31391    5.470000         2151.580        0.1891224  998.5857 767.5439
##      EMA_05   EMA_90     MACD Avg_Dir_Mvmt      RSI Awesome_Osc       ROC
## 1  986.7365 769.8161 58.98192     62.13294 83.87084    148.7609 10.184276
## 2 1000.6898 775.5036 62.40263     63.33110 85.06319    152.1394 16.116281
## 3 1036.9265 782.8420 72.51545     64.91275 89.92477    170.4634 27.171635
## 4 1034.1112 788.2407 69.48548     62.80727 62.62813    177.2781 12.394167
## 5 1003.2722 791.6111 59.04598     60.51073 50.71916    167.9914 -1.820025
## 6  963.8005 793.6604 49.65741     57.16568 49.96436    131.3790 -5.868097
##   Stoch_RSI Ultimate_Osc  True_SI Cum_Return  Log_Return Number_of_Transactions
## 1 0.6519728     62.30141 60.70143   22.80527   3.1831595                 290951
## 2 0.6588502     62.12566 62.42697   24.82625   1.6322853                 301664
## 3 0.9919971     65.90169 66.36453   37.07537   9.3608097                 328642
## 4 0.0000000     59.10028 55.77444   21.84394 -11.7789863                 288501
## 5 0.0000000     52.07252 41.69816   10.90365  -9.4079175                 346405
## 6 0.0000000     55.31741 32.33093   10.09658  -0.7303834                 282060
##   Active_Addresses New_Addresses Hash_Rate sentiment_elon sentiment_bitcoin
## 1           706004        350945   2514432        neutral          positive
## 2           656631        363271   2590800        neutral          positive
## 3           699023        394821   2490282        neutral          positive
## 4           653187        352053   2204094        neutral          positive
## 5           719666        400985   2579718       positive          positive
## 6           649279        333774   2202326       positive          positive
##   dist_new_2017p
## 1       6.941776
## 2       6.935950
## 3       7.011575
## 4       6.935838
## 5       6.847574
## 6       6.785426
# Original Dataset BoxCox Transformed
# Fit LM
lmMod_bc_2017p <- lm(dist_new_2017p ~ Volume+SMA_30+EMA_40+Altcoin_EMA_40+DASH+DOGE+ETC+ETH+LTC+SC+XEM+XMR+XRP+ZEC+CLF+
                 CNYUSDX+DJI+EURUSDX+GCF+GSPC+IXIC+JPYUSDX+TSLA+VIX+XWDTO+Cost_per_TR+Num_TR_per_Block+
                 Bu_Be_Spread_MA8+SMA_05+SMA_90+EMA_05+EMA_90+MACD+Avg_Dir_Mvmt+RSI+Awesome_Osc+ROC+
                 Stoch_RSI+Ultimate_Osc+True_SI+Cum_Return+Log_Return+Number_of_Transactions+
                 Active_Addresses+New_Addresses+Hash_Rate, data=cut_bit_df)


lmtest::bptest(lmMod_bc_2017p)
## 
##  studentized Breusch-Pagan test
## 
## data:  lmMod_bc_2017p
## BP = 309.24, df = 46, p-value < 2.2e-16
car::ncvTest(lmMod_bc_2017p)
## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 29.24724, Df = 1, p = 6.3707e-08
plot(lmMod_bc_2017p)

# Transform Returns Dataset with Box-Cox and check the heteroskedasticity again
# Make Box-Cox transformation model
distBCMod_2017p_ret <- caret::BoxCoxTrans(btc$WeightedPrice_return)
print(distBCMod_2017p_ret)
## Box-Cox Transformation
## 
## 2654 data points used to estimate Lambda
## 
## Input data summary:
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -0.227187 -0.011616  0.001398  0.002152  0.016413  0.412763 
## 
## Lambda could not be estimated; no transformation is applied
# Perform Box-Cox transformation
btc <- cbind(btc, dist_new_2017p_ret=predict(distBCMod_2017p_ret, btc$WeightedPrice_return))
head(btc)
##         Date WeightedPrice_return  DASH_return DOGE_return ETC_return
## 1 2014-02-27          0.011386392 -0.008975094  0.05352749          0
## 2 2014-02-28         -0.027054696 -0.175048216 -0.07570196          0
## 3 2014-03-01         -0.011625969  0.650333348 -0.02723187          0
## 4 2014-03-02          0.007199204  0.065322674 -0.05204781          0
## 5 2014-03-03          0.103996044  0.182002114  0.04126900          0
## 6 2014-03-04          0.088578535  0.032654424  0.01644381          0
##   ETH_return   LTC_return SC_return XEM_return XMR_return XRP_return ZEC_return
## 1          0 -0.015164062         0          0          0          0          0
## 2          0 -0.048587554         0          0          0          0          0
## 3          0 -0.023403005         0          0          0          0          0
## 4          0  0.002096915         0          0          0          0          0
## 5          0  0.003390253         0          0          0          0          0
## 6          0  0.249238757         0          0          0          0          0
##     CLF_return CNYUSDX_return   DJI_return EURUSDX_return   GCF_return
## 1 -0.001851982  -3.261895e-05  0.004583180   -0.004665577  0.002559874
## 2  0.001855418  -5.884376e-04  0.003014848    0.001864209 -0.007659921
## 3  0.000000000   0.000000e+00  0.000000000    0.000000000  0.000000000
## 4  0.000000000   0.000000e+00  0.000000000    0.000000000  0.000000000
## 5  0.022711784  -2.722010e-03 -0.009415661    0.005069901  0.021719351
## 6 -0.015154369  -1.955477e-04  0.014092662   -0.002898517 -0.009110382
##    GSPC_return  IXIC_return JPYUSDX_return  TSLA_return  VIX_return
## 1  0.004948083  0.006260424   -0.001456506 -0.001818163 -0.02160282
## 2  0.002782689 -0.002502948    0.001850968 -0.030608966 -0.00284900
## 3  0.000000000  0.000000000    0.000000000  0.000000000  0.00000000
## 4  0.000000000  0.000000000    0.000000000  0.000000000  0.00000000
## 5 -0.007378510 -0.007154005    0.007946333  0.023487555  0.14285714
## 6  0.015267701  0.017457371   -0.001055092  0.017081717 -0.11874998
##   XWDTO_return     Volume   SMA_30   EMA_40 Altcoin_EMA_40 Cost_per_TR
## 1   0.00560488  27925.863 675.6139 672.9201       8.901130    38.02965
## 2  -0.00557364  38993.570 667.7523 667.5708       8.698592    33.50832
## 3   0.00000000  21900.142 659.7758 662.1630       8.506256    33.50832
## 4   0.00000000   9532.377 651.6469 657.2145       8.324678    33.50832
## 5  -0.01091454 115915.276 644.9606 655.3519       8.155427    27.54891
## 6   0.02028046  53101.347 639.9936 656.2550       8.049617    33.87386
##   Num_TR_per_Block Bu_Be_Spread_MA8   SMA_05   SMA_90   EMA_05   EMA_90
## 1         460.0000        0.1152040 562.1636 613.6189 565.9526 747.2512
## 2         442.0000        0.1143583 551.7505 613.6189 565.0548 743.2074
## 3         442.0000        0.1143583 551.9148 613.6189 562.2736 739.1086
## 4         442.0000        0.1143583 566.4031 613.6189 561.7553 735.1879
## 5         373.0000        0.1118211 575.7285 613.6189 580.8473 732.6350
## 6         567.7852        0.1109754 594.7170 613.6189 611.8530 731.3434
##        MACD Avg_Dir_Mvmt      RSI Awesome_Osc        ROC Stoch_RSI Ultimate_Osc
## 1 -51.12634     74.32864 38.26849  -126.10740  -7.487245 0.9818706     58.71632
## 2 -50.79031     73.04248 34.69747  -129.37471 -11.904074 0.7752709     53.57958
## 3 -48.90195     71.90756 37.46850  -122.92658  -9.185845 0.9355880     52.17020
## 4 -46.87571     70.85371 37.45128   -99.55943   2.320085 0.9345913     54.50800
## 5 -36.37930     67.06642 55.12140   -83.06932  16.783975 1.0000000     63.02288
## 6 -27.73094     63.54965 55.13786   -61.48890  10.358037 1.0000000     56.80929
##     True_SI Cum_Return  Log_Return Number_of_Transactions Active_Addresses
## 1 -36.37036  -30.55432 -0.43578758                  70859           185098
## 2 -35.54387  -34.12306 -5.27563417                  70290           196525
## 3 -34.01388  -32.54936  2.36076239                  63402           181719
## 4 -32.80567  -32.56520 -0.02348741                  56635           159491
## 5 -25.11566  -19.96896 17.12533192                  81264           237371
## 6 -19.33400  -19.95377  0.01898200                  84600           232480
##   New_Addresses Hash_Rate sentiment_elon sentiment_bitcoin dist_new_2017p_ret
## 1         95229  29478.51        neutral          positive        0.011386392
## 2         92184  31620.30        neutral          positive       -0.027054696
## 3        100239  27215.37        neutral          positive       -0.011625969
## 4         80905  26872.27        neutral          positive        0.007199204
## 5        123677  28209.56        neutral          positive        0.103996044
## 6        132656  28401.79        neutral          positive        0.088578535
# Original Dataset BoxCox Transformed
# Fit LM
lmMod_bc_2017p_ret <- lm(dist_new_2017p_ret ~ DASH_return+DOGE_return+ETC_return+ETH_return+LTC_return+SC_return+XEM_return+
                           XMR_return+XRP_return+ZEC_return+CLF_return+CNYUSDX_return+DJI_return+EURUSDX_return+
                           GCF_return+GSPC_return+IXIC_return+JPYUSDX_return+TSLA_return+VIX_return+
                           XWDTO_return+Volume+SMA_30+EMA_40+Altcoin_EMA_40+Cost_per_TR+Num_TR_per_Block+
                           Bu_Be_Spread_MA8+SMA_05+SMA_90+EMA_05+EMA_90+MACD+Avg_Dir_Mvmt+RSI+Awesome_Osc+
                           ROC+Stoch_RSI+Ultimate_Osc+True_SI+Cum_Return+Log_Return+Number_of_Transactions+
                           Active_Addresses+New_Addresses+Hash_Rate, data=btc)


lmtest::bptest(lmMod_bc_2017p_ret)
## 
##  studentized Breusch-Pagan test
## 
## data:  lmMod_bc_2017p_ret
## BP = 488.71, df = 46, p-value < 2.2e-16
car::ncvTest(lmMod_bc_2017p_ret)
## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 419.7224, Df = 1, p = < 2.22e-16
plot(lmMod_bc_2017p_ret)

## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced

## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced

# Try an Autoarima with transformed data
arima_tr <- auto.arima(bit_ts_tran)

checkresiduals(arima_tr)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(3,1,1) with drift
## Q* = 8.1023, df = 5, p-value = 0.1507
## 
## Model df: 5.   Total lags used: 10
# Try an Autoarima with daily returns data
arima_ret <- auto.arima(bit_ret_ts)

checkresiduals(arima_ret)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(1,0,2) with non-zero mean
## Q* = 10.711, df = 6, p-value = 0.09773
## 
## Model df: 4.   Total lags used: 10
# Taking only 2020
cut2_bit_df = cut_bit_df %>%
  filter(Date >= ymd('2020-01-01'))

ggplotly(cut2_bit_df %>%
           mutate(WeightedPrice = BoxCox(cut2_bit_df$WeightedPrice, 
                                         lambda = BoxCox.lambda(
                                           cut2_bit_df$WeightedPrice))) %>%
           ggplot(aes(Date, WeightedPrice)) + geom_line(col = '#ffa500') + 
           labs(title = 'Bitcoin', x = '', y = 'Price (Transformed)') + my_theme)
ggplotly(cut2_bit_df[-1,] %>%
           mutate(WeightedPrice = diff(BoxCox(cut2_bit_df$WeightedPrice, 
                                      lambda = BoxCox.lambda(
                                        cut2_bit_df$WeightedPrice)))) %>%
           ggplot(aes(Date, WeightedPrice)) + geom_line(col = '#ffa500') + 
           my_theme + labs(x = '', title = 'Transformed Price', y = 'Difference'))
# ACF, PCF only for 2020
bit_ts2 = bitcoin %>%
  filter(Date >= as.Date('2020-01-01')) %>%
  arrange(Date) %>%
  select(WeightedPrice) %>%
  as.matrix() %>%
  ts()

bit_ts_tran2 = BoxCox(bit_ts2, lambda = BoxCox.lambda(bit_ts2))

ggAcf(diff(bit_ts_tran2), lag.max = 200) + my_theme + labs(title = 'ACF' , 
                                                           y = 'Correlation') 

ggPacf(diff(bit_ts_tran2), lag.max = 200) + my_theme + labs(title = 'PACF', y = '')

# Autoarima for Data only 2020
arima_2020_tr <- auto.arima(bit_ts_tran2)

checkresiduals(arima_2020_tr)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,1,1) with drift
## Q* = 14.894, df = 8, p-value = 0.06123
## 
## Model df: 2.   Total lags used: 10
# Taking only 2021
cut3_bit_df = cut_bit_df %>%
  filter(Date >= ymd('2021-01-01'))

ggplotly(cut3_bit_df %>%
           mutate(WeightedPrice = BoxCox(cut3_bit_df$WeightedPrice, 
                                         lambda = BoxCox.lambda(
                                           cut3_bit_df$WeightedPrice))) %>%
           ggplot(aes(Date, WeightedPrice)) + geom_line(col = '#ffa500') + 
           labs(title = 'Bitcoin', x = '', y = 'Price (Transformed)') + my_theme)
ggplotly(cut3_bit_df[-1,] %>%
           mutate(WeightedPrice = diff(BoxCox(cut3_bit_df$WeightedPrice, 
                                              lambda = BoxCox.lambda(
                                                cut3_bit_df$WeightedPrice)))) %>%
           ggplot(aes(Date, WeightedPrice)) + geom_line(col = '#ffa500') + 
           my_theme + labs(x = '', title = 'Transformed Price', y = 'Difference'))
# ACF, PCF only for 2021
bit_ts3 = bitcoin %>%
  filter(Date >= as.Date('2021-01-01')) %>%
  arrange(Date) %>%
  select(WeightedPrice) %>%
  as.matrix() %>%
  ts()

bit_ts_tran3 = BoxCox(bit_ts3, lambda = BoxCox.lambda(bit_ts2))

ggAcf(diff(bit_ts_tran3), lag.max = 200) + my_theme + labs(title = 'ACF' , 
                                                           y = 'Correlation') 

ggPacf(diff(bit_ts_tran3), lag.max = 200) + my_theme + labs(title = 'PACF', y = '')

# Autoarima for Data only 2021
arima_2021_tr <- auto.arima(bit_ts_tran3)

checkresiduals(arima_2021_tr)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,1,0)
## Q* = 13.409, df = 10, p-value = 0.2017
## 
## Model df: 0.   Total lags used: 10
# ARIMA Model Fits
## Data after 2017
bit_ts_past_2017 = bitcoin %>%
  filter(Date >= as.Date('2017-01-01')) %>%
  arrange(Date) %>%
  select(WeightedPrice) %>%
  as.matrix() %>%
  ts()

bit_ts_past_2017 %>%
  BoxCox(lambda = BoxCox.lambda(bit_ts_past_2017)) %>%
  Arima(order = c(0,1,0), include.drift = T) %>%
  checkresiduals()

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,1,0) with drift
## Q* = 81.859, df = 9, p-value = 6.894e-14
## 
## Model df: 1.   Total lags used: 10
summary(Arima(bit_ts_tran, order = c(0,1,0), include.drift = T))
## Series: bit_ts_tran 
## ARIMA(0,1,0) with drift 
## 
## Coefficients:
##        drift
##       0.0016
## s.e.  0.0006
## 
## sigma^2 estimated as 0.000653:  log likelihood=3626.59
## AIC=-7249.18   AICc=-7249.17   BIC=-7238.41
## 
## Training set error measures:
##                        ME       RMSE        MAE           MPE      MAPE
## Training set 3.788572e-06 0.02553818 0.01755831 -1.606423e-05 0.2328859
##                   MASE      ACF1
## Training set 0.9947251 0.2064681
## Daily Return Data after 2017
bit_ts_ret_past_2017 = btc %>%
  filter(Date >= as.Date('2017-01-01')) %>%
  arrange(Date) %>%
  select(WeightedPrice_return) %>%
  as.matrix() %>%
  ts()

bit_ts_ret_past_2017 %>%
  Arima(order = c(0,1,0), include.drift = T) %>%
  checkresiduals()

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,1,0) with drift
## Q* = 262.58, df = 9, p-value < 2.2e-16
## 
## Model df: 1.   Total lags used: 10
summary(Arima(bit_ret_ts, order = c(0,1,0), include.drift = T))
## Series: bit_ret_ts 
## ARIMA(0,1,0) with drift 
## 
## Coefficients:
##        drift
##       0.0000
## s.e.  0.0011
## 
## sigma^2 estimated as 0.002042:  log likelihood=2707.03
## AIC=-5410.07   AICc=-5410.06   BIC=-5399.3
## 
## Training set error measures:
##                        ME       RMSE        MAE      MPE    MAPE      MASE
## Training set 2.741485e-08 0.04516154 0.03086297 85.18454 445.309 0.9993754
##                    ACF1
## Training set -0.3749845
## Random Walk Test on Daily Data only 2020
bit_ts_tran2 %>%
  Arima(order = c(0,1,0), include.drift = T) %>%
  checkresiduals()

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,1,0) with drift
## Q* = 32.082, df = 9, p-value = 0.0001927
## 
## Model df: 1.   Total lags used: 10
summary(Arima(bit_ret_ts, order = c(0,1,0), include.drift = T))
## Series: bit_ret_ts 
## ARIMA(0,1,0) with drift 
## 
## Coefficients:
##        drift
##       0.0000
## s.e.  0.0011
## 
## sigma^2 estimated as 0.002042:  log likelihood=2707.03
## AIC=-5410.07   AICc=-5410.06   BIC=-5399.3
## 
## Training set error measures:
##                        ME       RMSE        MAE      MPE    MAPE      MASE
## Training set 2.741485e-08 0.04516154 0.03086297 85.18454 445.309 0.9993754
##                    ACF1
## Training set -0.3749845
## Random Walk Test on Daily Data only 2021
bit_ts_tran3 %>%
  Arima(order = c(0,1,0), include.drift = T) %>%
  checkresiduals()

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,1,0) with drift
## Q* = 13.43, df = 9, p-value = 0.1441
## 
## Model df: 1.   Total lags used: 10
summary(Arima(bit_ret_ts, order = c(0,1,0), include.drift = T))
## Series: bit_ret_ts 
## ARIMA(0,1,0) with drift 
## 
## Coefficients:
##        drift
##       0.0000
## s.e.  0.0011
## 
## sigma^2 estimated as 0.002042:  log likelihood=2707.03
## AIC=-5410.07   AICc=-5410.06   BIC=-5399.3
## 
## Training set error measures:
##                        ME       RMSE        MAE      MPE    MAPE      MASE
## Training set 2.741485e-08 0.04516154 0.03086297 85.18454 445.309 0.9993754
##                    ACF1
## Training set -0.3749845
# Check errors
# Transformed Data
err_tr = residuals(Arima(bit_ts_tran, order = c(0,1,0), include.drift = T))
cat('Standard Deviation = ', sd(err_tr))
## Standard Deviation =  0.02554609
cat('Mean =', mean(err_tr))
## Mean = 3.788572e-06
invers_BoxCox = function(ts_data, lambda){
  original_ts = (ts_data * lambda + 1) ** (1/lambda)
  return(original_ts)
}

invers_BoxCox(sd(err_tr), BoxCox.lambda(bit_ts))
## [1] 1.025888
# Daily Return Data
err_ret = residuals(Arima(bit_ret_ts, order = c(0,1,0), include.drift = T))
cat('Standard Deviation = ', sd(err_ret))
## Standard Deviation =  0.04517554
cat('Mean =', mean(err_ret))
## Mean = 2.741485e-08
invers_BoxCox(sd(err_ret), BoxCox.lambda(bit_ret_ts))
## Warning in guerrero(x, lower, upper): Guerrero's method for selecting a Box-Cox
## parameter (lambda) is given for strictly positive data.
## [1] 1.045235
# Forecast with ARIMA
## h is the the length you want the prediction to be in units of days

fit_model = function(bitcoin_data, h){
  bitcoin_df = bitcoin_data %>%
    filter(Date >= as.Date('2017-01-01')) %>%
    arrange(Date)
  
  time_series = bitcoin_df %>%
    select(WeightedPrice) %>%
    ts()
  
  predictions = time_series %>%
    BoxCox(lambda = BoxCox.lambda(time_series)) %>% 
    auto.arima() %>%
    forecast(h)
  
  forecast_df = cbind(data.frame(predictions[4]), 
                      data.frame(predictions[5]), 
                      data.frame(predictions[6]))
  
  the_forecast = invers_BoxCox(forecast_df, lambda = BoxCox.lambda(time_series))
  
  the_forecast = the_forecast %>%
    mutate(Date = tail(bitcoin_df$Date, h) + h) %>%
    as_tibble()
  
  return(the_forecast)
}



# read the updated data for BTC prices

bitcoin_new <- read_csv('btc_base_dataset_NEW.csv')
## 
## -- Column specification --------------------------------------------------------
## cols(
##   .default = col_double(),
##   Date = col_date(format = ""),
##   Day_of_Week = col_character()
## )
## i Use `spec()` for the full column specifications.
# Plot the new BTC Data
ggplotly(ggplot(bitcoin_new, aes(Date, WeightedPrice)) + 
           geom_line(col = '#ffa500') + 
           labs(title = 'Bitcoin Weighted Prices 2014 -2021 (new)', x = '') +
           scale_y_continuous(breaks = c(0, 5000, 10000, 15000, 30000, 60000), 
                              labels = c('$0', '$5,000', '$10,000', '$15,000', 
                                         '$30,000', '$60,000')) + my_theme)
## Predict the BTC Price for the next 30 Days

ggplotly(fit_model(bitcoin, 30) %>%
           ggplot(aes(x = Date, y = mean)) + geom_line(col = '#ff2500') +
           geom_ribbon(aes(ymin = lower.80., ymax = upper.80.), alpha = .3, 
                       fill = '#ffc04c') +
           geom_ribbon(aes(ymin = lower.95., ymax = upper.95.), alpha = .3, 
                       fill = '#ffe4b2') +
           geom_line(data = bitcoin_new, aes(Date, WeightedPrice)) +
           geom_line(data = filter(bitcoin, Date >= as.Date('2015-01-01')), 
                     aes(Date, WeightedPrice), col = '#ffa500') + my_theme +
           labs(title = 'Bitcoin Prediction of 30 Days', y = 'Price', x = '') +
           scale_y_continuous(breaks = c(0, 5000, 10000, 15000, 20000, 25000, 
                                         30000, 35000, 40000, 45000, 50000, 
                                         55000, 60000), 
                              labels = c('$0', '$5,000', '$10,000', '$15,000',
                                         '$20,000', '$25,000', '$30,000', 
                                         '$35,000', '$40,000', '$45,000', 
                                         '$50,000', '$55,000', '$60,000')))